/*
Clean EU-SILC cross-sectional personal data (P-file), 2004-2023.
Extracts individual-level variables (hours worked, employment status,
education, health) for each year and appends into a single panel.

Uses:
$usern/Raw_Data/NO_CROSS/[year]/UDB_cNO[yy]P.csv  (years 2004-2023)

Creates:
$usern/Intermediate_Data/[year]_P_vars.dta  (years 2004-2023)
$usern/Intermediate_Data/0423_P_vars.dta
*/

*** Set directory
clear
global usern "$root/data/silc"

********************************************************************************
*** PERSONAL DATA (P-FILE)
********************************************************************************

forv year=2004(1)2023 {
	local y = substr("`year'", -2, .) // Extract the last two digits of the year
	display "`y'"
	display "`year'"
	
	//import delimited "$usern/Raw_Data/NO_CROSS/2018/UDB_cNO18P.csv", varnames(1) clear
	import delimited "$usern/Raw_Data/NO_CROSS/`year'/UDB_cNO`y'P.csv", varnames(1) clear
	
	// PB vars
	rename pb010 year
	label var year "Year of the survey"

	rename pb030 persid
	label var persid "hh id + pers number (2 digits)"

	rename pb040 persweight
	label var persweight "Cross-sectional weight"

	rename pb140 birthyear
	label var birthyear "Year of birth"

	rename pb150 sex
	label var sex "Sex"
	label define sex_lbl 1 "Male" 2 "Female"
	label values sex sex_lbl

	rename pb160 father_id
	label var father_id "Father ID"

	rename pb170 mother_id
	label var mother_id "Mother ID"

	rename pb180 spouse_id
	label var spouse_id "Spouse/Partner ID"

	rename pb190 marital
	label var marital "Marital status"
	label define marital_status_lbl 1 "Never married" 2 "Married" 3 "Separated" 4 "Widowed" 5 "Divorced"
	label values marital marital_status_lbl
	
	gen married=marital==2
	label var married "Married"

	gen female = sex == 2 
	label var female "Indicator for female (1=Female)"
	
	
	// PE vars
	* Education
	label define isced_lbl ///
		0 "Pre-primary education" ///
		1 "Primary education" ///
		2 "Lower secondary education" ///
		3 "(Upper) secondary education" ///
		4 "Post-secondary non-tertiary education" ///
		5 "First stage tertiary education" 
		
	if "`year'" < "2014" {
		rename pe040 educ
		label var educ "Education"
		label values educ isced_lbl
	}
	
	if "`year'" >= "2014" & "`year'" < "2021" {
		gen educ = .
		label var educ "Education"
		
		replace educ = 0 if pe040 == 000 // Less than primary education
		replace educ = 1 if inrange(pe040, 100, 199) // Primary education
		replace educ = 2 if inrange(pe040, 200, 299) // Lower secondary education
		replace educ = 3 if inrange(pe040, 300, 399) // (Upper) secondary education
		replace educ = 4 if inrange(pe040, 400, 499) // Post-secondary non-tertiary education
		replace educ = 5 if inrange(pe040, 500, 899) // Tertiary education (short cycle, bachelor's, master's, doctoral)
		
		label values educ isced_lbl
	}
	
	if "`year'">="2021" {
		gen educ = .
		label var educ "Education"
		
		replace educ = 0 if pe041 == 0 // No formal education or below ISCED 1
		replace educ = 1 if inrange(pe041, 100, 199) // ISCED 1 Primary education
		replace educ = 2 if inrange(pe041, 200, 299) // ISCED 2 Lower secondary education
		replace educ = 3 if inrange(pe041, 300, 399) // ISCED 3 Upper secondary education
		replace educ = 4 if inrange(pe041, 440, 499) // ISCED 4 Post-secondary non-tertiary education
		replace educ = 5 if inrange(pe041, 500, 899) // ISCED 5-8 Tertiary education (Short-cycle, Bachelor's, Master's, Doctoral)
		
		label values educ isced_lbl
	}
	
	gen college=educ==5
	label var college "Has college educ"
	
	* Employment
	if "`year'"<"2021" {
		rename pl040 employment
	}
	if "`year'">="2021" {
		rename pl040a employment
	}
	label var employment "Status in employment (main job)"
	label define employment_status_lbl ///
		1 "Self-employed with employees" ///
		2 "Self-employed without employees" ///
		3 "Employee" ///
		4 "Family worker (unpaid)"
	label values employment employment_status_lbl
	
	gen self_employed=0
	replace self_employed=1 if (employment==1 | employment==2)
	label var self_employed "Self-employed"
	
	gen lfp=.
	label var lfp "Labor force part. status"
	label define lfp_label ///
		1 "Employed" ///
		2 "Unemployed" ///
		3 "Not in labor force" 
		
	if "`year'"<="2009" {
		replace lfp=1 if pl030==1 | pl030==2
		replace lfp=2 if pl030==3 
		replace lfp=3 if pl030>=4
	}
	if "`year'">"2009" & "`year'"<"2021" {
		replace lfp=1 if pl031==1 | pl031==2 | pl031==3 | pl031==4
		replace lfp=2 if pl031==5
		replace lfp=3 if pl031>=6
	}
	if "`year'">="2021" {
		replace lfp=1 if pl032==1 
		replace lfp=2 if pl032==2
		replace lfp=3 if pl032>=3
	}
	label val lfp lfp_label
	
	gen change_job=.
	if "`year'"<="2020"{
		replace change_job=pl160
	}
	
	gen nace=""
	label var nace "NACE code (2-dig)"
	// Try to convert NACE Rev1 into Rev2 to include years before 2008
	if "`year'">="2008" & "`year'"<"2021" {
		replace nace=pl111
	}
	if "`year'">="2021" {
		replace nace=pl111a
	}
	replace nace="Missing" if nace==""
	
	rename pl060 hrs_work 
	label var hrs_work "Hours usually worked per week"
	
	rename pl150 supervisor
	label var supervisor "Supervisory responsability during the main job"
	label define yesno 0 "No" 1 "Yes"
	label value supervisor yesno
	
	rename pl200 years_paidwork
	label var years_paidwork "Years spent in paid work" // Employee and self-employed
	
	* Misc
	rename ph010 self_perceived_health
	label var self_perceived_health "Self-perceived general health"
	label define health_lbl ///
		1 "Very good" ///
		2 "Good" ///
		3 "Fair (neither good nor bad)" ///
		4 "Bad" ///
		5 "Very bad" 
	label values self_perceived_health health_lbl
	
	gen bad_health=0
	replace bad_health=1 if self_perceived_health==4 | self_perceived_health==5
	label var bad_health "Bad self-perceived health"
	
	gen good_health=0
	replace good_health=1 if self_perceived_health==1 | self_perceived_health==2
	label var good_health "Good self-perceived health"

	rename py050g cash_benefits 
	label var cash_benefits "Cash benefits/Losses from selfempl"
	
	rename py120g sick_benefits 
	label var cash_benefits "Sickness benefits"
	
	keep year persid persweight birthyear sex father_id mother_id spouse_id marital married ///
	female educ college lfp employment self_employed hrs_work nace supervisor years_paidwork cash_benefits ///
	sick_benefits self_perceived_health bad_health good_health change_job
	
	drop if persid == . | birthyear == . | sex == .
	
	save "$usern/Intermediate_Data/`year'_P_vars.dta", replace
}

*** Append years
use "$usern/Intermediate_Data/2004_P_vars.dta", clear

forv year=2005(1)2023 {
	append using "$usern/Intermediate_Data/`year'_P_vars.dta", force
}
save "$usern/Intermediate_Data/0423_P_vars.dta", replace



